setwd("..")
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
library(stargazer)

#Chinese input models
baichuan13b_temp0.3 <- read.csv("data/intermediate/censorship_baichuan13b_temp0.3_chn.csv")
chatglm_temp0.8 <- read.csv("data/intermediate/censorship_chatglm_temp0.8_chn.csv")
ernie_api_temp0.01 <- read.csv("data/intermediate/censorship_ernie_api_temp0.01_chn.csv")
ernie_api_temp0.95 <- read.csv("data/intermediate/censorship_ernie_api_temp0.95_chn.csv")
ernie_api_temp1.0 <- read.csv("data/intermediate/censorship_ernie_api_temp1.0_chn.csv")
gpt3.5_temp0 <- read.csv("data/intermediate/censorship_gpt3.5_temp0_chn.csv")
gpt3.5_temp0.7 <- read.csv("data/intermediate/censorship_gpt3.5_temp0.7_chn.csv")
gpt4_temp0 <- read.csv("data/intermediate/censorship_gpt4_temp0_chn.csv")
gpt4_temp0.7 <- read.csv("data/intermediate/censorship_gpt4_temp0.7_chn.csv")
llama2_temp0.6 <- read.csv("data/intermediate/censorship_llama2_temp0.6_chnengtrans.csv")
llama2_uncensored_temp1.0 <- read.csv("data/intermediate/censorship_llama2_uncensored_temp1.0_chnengtrans.csv")
deepseek_temp0 <- read.csv("data/intermediate/censorship_deepseek_temp0_chn.csv")
deepseek_temp0.7 <- read.csv("data/intermediate/censorship_deepseek_temp0.7_chn.csv")
gpt4o_temp0 <- read.csv("data/intermediate/censorship_gpt4o_temp0_chn.csv")
gpt4o_temp0.7 <- read.csv("data/intermediate/censorship_gpt4o_temp0.7_chn.csv")



#English input models (translated into Chinese)
baichuan13b_temp0.3eng <- read.csv("data/intermediate/censorship_baichuan13b_temp0.3_engtrans.csv")
chatglm_temp0.8eng <- read.csv("data/intermediate/censorship_chatglm_temp0.8_engtrans.csv")
gpt3.5_temp0eng <- read.csv("data/intermediate/censorship_gpt3.5_temp0_engtrans.csv")
gpt3.5_temp0.7eng <- read.csv("data/intermediate/censorship_gpt3.5_temp0.7_engtrans.csv")
gpt4_temp0eng <- read.csv("data/intermediate/censorship_gpt4_temp0_engtrans.csv")
gpt4_temp0.7eng <- read.csv("data/intermediate/censorship_gpt4_temp0.7_engtrans.csv")
llama2_temp0.6eng <- read.csv("data/intermediate/censorship_llama2_temp0.6_engtrans.csv")
llama2_uncensored_temp1.0eng <- read.csv("data/intermediate/censorship_llama2_uncensored_temp1.0_engtrans.csv")
deepseek_temp0eng <- read.csv("data/intermediate/censorship_deepseek_temp0_engtrans.csv")
deepseek_temp0.7eng <- read.csv("data/intermediate/censorship_deepseek_temp0.7_engtrans.csv")
gpt4o_temp0eng <- read.csv("data/intermediate/censorship_gpt4o_temp0_engtrans.csv")
gpt4o_temp0.7eng <- read.csv("data/intermediate/censorship_gpt4o_temp0.7_engtrans.csv")

dataset_names <- c("baichuan13b_temp0.3", "chatglm_temp0.8", "ernie_api_temp0.01", "ernie_api_temp0.95", "ernie_api_temp1.0", 
                   "gpt3.5_temp0", "gpt3.5_temp0.7", "gpt4_temp0", "gpt4_temp0.7", "llama2_temp0.6", "llama2_uncensored_temp1.0",
                   "deepseek_temp0", "deepseek_temp0.7", "gpt4o_temp0", "gpt4o_temp0.7", 
                   "baichuan13b_temp0.3eng", "chatglm_temp0.8eng", "gpt3.5_temp0eng", "gpt3.5_temp0.7eng", "gpt4_temp0eng", 
                   "gpt4_temp0.7eng", "llama2_temp0.6eng", "llama2_uncensored_temp1.0eng",
                   "deepseek_temp0eng", "deepseek_temp0.7eng", "gpt4o_temp0eng", "gpt4o_temp0.7eng")

#Pre-processing: counts, etc.
mode_function <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  
  conv_cols <- df %>%
    select(starts_with("conv")) %>%
    select(where(is.numeric))
  
  # Calculate the sum of columns starting with "conv" for each row
  df$mean_conv <- rowMeans(conv_cols, na.rm = TRUE) 
  
  # Calculate the maximum of columns starting with "conv" for each row
  df$max_conv <- do.call(pmax, c(conv_cols, na.rm = TRUE))
  
  # Calculate the counts of the most frequent responses of columns starting with "conv" for each row
  df$mode_conv <- apply(conv_cols, 1, mode_function)
  
  # Assign the modified data frame back to its original name
  assign(dataset_names[[i]], df)
}


##-------------------------------------------------------------------------------------------------
##---------------------------Compare models with Chinese input-------------------------------------
##-------------------------------------------------------------------------------------------------
datasets <- list(baichuan13b_temp0.3,baichuan13b_temp0.3eng, chatglm_temp0.8, chatglm_temp0.8eng, ernie_api_temp0.01,
                 ernie_api_temp0.95,ernie_api_temp1.0, deepseek_temp0, deepseek_temp0eng, deepseek_temp0.7, deepseek_temp0.7eng,
                 llama2_temp0.6, llama2_temp0.6eng, llama2_uncensored_temp1.0, llama2_uncensored_temp1.0eng,
                 gpt3.5_temp0, gpt3.5_temp0eng, gpt3.5_temp0.7, gpt3.5_temp0.7eng, gpt4_temp0, gpt4_temp0eng, gpt4_temp0.7, gpt4_temp0.7eng,
                 gpt4o_temp0, gpt4o_temp0eng, gpt4o_temp0.7, gpt4o_temp0.7eng)

dataset_names <- c("BaiChuan: T0.3", "BaiChuan(Eng): T0.3", "ChatGLM: T0.8", "ChatGLM(Eng): T0.8", "Ernie Bot: T0.01", 
                   "Ernie Bot: T0.95", "Ernie Bot: T1.0", "DeepSeek: T0", "DeepSeek(Eng): T0", "DeepSeek: T0.7", "DeepSeek(Eng): T0.7", 
                   "Llama2: T0.6", "Llama2(Eng): T0.6", "Llama2-unc.: T1.0", "Llama2-unc.(Eng): T1.0", 
                   "GPT3.5: T0", "GPT3.5(Eng): T0", "GPT3.5: T0.7", "GPT3.5(Eng): T0.7",
                   "GPT4: T0", "GPT4(Eng): T0", "GPT4: T0.7", "GPT4(Eng): T0.7", 
                   "GPT4o: T0", "GPT4o(Eng): T0", "GPT4o: T0.7", "GPT4o(Eng): T0.7")

##---------------------------Refusal to Respond Rate-------------------------------------

# Initialize storage for results
results <- data.frame(dataset = character(),
                      mean = numeric(),
                      std_error = numeric(),
                      stringsAsFactors = FALSE)

# Loop through the datasets
for (i in 1:length(datasets)) {
  data <- datasets[[i]]
  mean_val <- mean(data$per_nonresponse)
  std_error <- sd(data$per_nonresponse) / sqrt(nrow(data))
  
  # Store results
  results <- rbind(results, data.frame(dataset = dataset_names[i],
                                       mean = mean_val,
                                       std_error = std_error))
}

results$dataset <- factor(results$dataset, levels = dataset_names)

results$type <- c("China:Ch","China:En","China:Ch","China:En","China:Ch","China:Ch","China:Ch",
                  "China:Ch","China:En","China:Ch","China:En", "Non-China:Ch","Non-China:En",
                  "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                  "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                  "Non-China:Ch","Non-China:En", "Non-China:Ch","Non-China:En")

##---------------------Remove English Prompts for Plots---------------------------
results_filtered <- results %>%
  filter(!grepl("\\(Eng\\)", dataset))
results_filtered$dataset <- droplevels(results_filtered$dataset)

results_filtered$type <- c( "China", "China", "China", "China", "China", "China", "China", "Non-China", "Non-China", "Non-China", "Non-China", 
                            "Non-China", "Non-China", "Non-China", "Non-China")

#########Plot Figure 1#########
pdf("graph/nonrespcombine.pdf",7,7)
ggplot(results_filtered, aes(x = mean, y = dataset, fill = type)) +
  geom_bar(stat = "identity", orientation = "y") +
  geom_errorbar(aes(xmin = mean - std_error, xmax = mean + std_error), width = 0.2) +
  geom_text(aes(label = sprintf("%.2f", mean)), hjust = ifelse(results_filtered$mean > 30, -0.75, -0.75)) +
  scale_fill_manual(name = "LLM:", values = c("China" = "orange", "Non-China" = "skyblue")) +
  scale_x_continuous(limits = c(0, 70)) + 
  scale_y_discrete(limits = rev(levels(results_filtered$dataset))) +
  labs(title = "Refusal to Respond Rate (Percentage)",
       y = "Name of LLM and Temperature",
       x = NULL) +
  theme_bw() +
  theme(axis.text.y = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme(plot.margin = unit(c(0.5,0.5,0.5,0.65), "cm"),
        axis.text.x = element_text(size = 12),  # Increase font size of x-axis labels
        axis.text.y = element_text(size = 12, vjust = 0.5),  # Increase font size of y-axis labels
        legend.position = "bottom",  # Move legend to bottom
        legend.direction = "horizontal",
        legend.text = element_text(size = 12)) +  # Arrange legend horizontally
  guides(fill = guide_legend(nrow = 1))  # Arrange legend items in a single row
dev.off()

##---------------------Generate A Variable for Prompt Group---------------------------
resultsbygroup <- results %>%
  mutate(group = ifelse(str_detect(dataset, "\\(Eng\\)"), "English", "Chinese"))

# Extract base model names (without temperature values and language tags)
resultsbygroup  <- resultsbygroup  %>%
  mutate(Base_Model = str_replace_all(dataset, "\\(Eng\\)", "")   )  # Remove (Eng)# Remove temperature values

##---------------------Generate A Variable for Country Group---------------------------
resultsbygroup <- resultsbygroup %>% 
  separate(type, into = c("country", "language"), sep = ":")

#########Plot Figure 4#########
pdf("graph/enchcomparison1.pdf",7,7)
ggplot(resultsbygroup, aes(x = mean, y=Base_Model, group = group, color = group)) +
  #geom_line(size = 1, orientation = "y") +                    # Draw lines connecting points
  geom_line(aes(linetype = group), size = 0.6, orientation = "y", color="black") +
  geom_point(size = 3, aes(color = country)) +                    # Add points
  geom_errorbarh(aes(xmin = mean - std_error, xmax = mean + std_error,color = country), size = 1, height = 0.3) +  # Error bars
  scale_y_discrete(limits = rev(levels(results_filtered$dataset))) +
  #scale_color_manual(values = c("English" = "blue", "Chinese" = "red", "China" = "orange", "Non-China" = "skyblue")) +  # Custom colors
  scale_color_manual(name = "LLM:", values = c("China" = "orange", "Non-China" = "skyblue")) +
  scale_linetype_manual(name = "Prompt:", values = c("English" = "dashed", "Chinese" = "solid")) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 12),
    axis.title = element_text(size = 12),
    axis.text.x = element_text(size = 12),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 12, vjust = 0.5),  # Increase font size of y-axis labels
    legend.position = c(0.8,0.2),
    legend.text = element_text(size = 12)) +
  labs(y = "Name of LLM and Temperature", x = "Mean & %95CIs", title = "Refusal-to-Respond Rates by Prompt Language and Model Group")
dev.off()


#Save results for the timeline plot (Figure 6)
results_timeline <- results_filtered


##---------------------------Character Counts of Response-------------------------------------

# Initialize storage for results counts
results_counts <- data.frame(dataset = character(),
                             mean = numeric(),
                             std_error = numeric(),
                             stringsAsFactors = FALSE)

# Loop through the datasets
for (i in 1:length(datasets)) {
  data <- datasets[[i]]
  mean_val <- mean(data$mean_conv)
  std_error <- sd(data$mean_conv) / sqrt(nrow(data))
  
  # Store results
  results_counts <- rbind(results_counts, data.frame(dataset = dataset_names[i],
                                                     mean = mean_val,
                                                     std_error = std_error))
}

results_counts$dataset <- factor(results_counts$dataset, levels = dataset_names)

results_counts$type <- c("China:Ch","China:En","China:Ch","China:En","China:Ch","China:Ch","China:Ch",
                         "China:Ch","China:En","China:Ch","China:En", "Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En", "Non-China:Ch","Non-China:En")

##---------------------Remove English Prompts for Plots---------------------------
results_counts_filtered <- results_counts %>%
  filter(!grepl("\\(Eng\\)", dataset))
results_counts_filtered$dataset <- droplevels(results_counts_filtered$dataset)

results_counts_filtered$type <- c( "China", "China", "China", "China", "China", "China", "China", "Non-China", "Non-China", "Non-China", "Non-China", 
                                                                "Non-China", "Non-China", "Non-China", "Non-China")

#########Plot Figure 2#########
pdf("graph/countscombine.pdf",7,7)
ggplot(results_counts_filtered, aes(x = mean, y = dataset, fill = type)) +
  geom_bar(stat = "identity", orientation = "y") +
  geom_errorbar(aes(xmin = mean - std_error, xmax = mean + std_error), width = 0.2) +
  geom_text(aes(label = round(mean, 0)), hjust = -0.5) +  # Adjusted for horizontal orientation
  scale_fill_manual(name = "LLM:", values = c("China" = "orange", "Non-China" = "skyblue")) +
  scale_x_continuous(limits = c(0, 1100)) + 
  scale_y_discrete(limits = rev(levels(results_counts_filtered$dataset))) +  # Reverse the order of y-axis
  labs(title = "Average Counts of Characters",
       y = "Name of LLM and Temperature",
       x = NULL) +
  theme_bw() +
  theme(axis.text.y = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme(plot.margin = unit(c(0.5,0.5,0.5,0.65), "cm"),
        axis.text.x = element_text(size = 12),  # Increase font size of x-axis labels
        axis.text.y = element_text(size = 12, vjust = 0.5),  # Increase font size of y-axis labels
        legend.position = "bottom",  # Move legend to bottom
        legend.direction = "horizontal",
        legend.text = element_text(size = 12) ) +  # Arrange legend horizontally
  guides(fill = guide_legend(nrow = 1))  # Arrange legend items in a single row
dev.off()

#------------Save Refusal Rate for the comparison between all questions against less-sensitive questions
cvprefusal <- results_filtered

#-------------------------------------------------------------------------------------------------------
#-----------------------------------------Placebo (Less Sensitive Questions)----------------------------
#-------------------------------------------------------------------------------------------------------

values_to_keep <- c("8", "24", "25", "29", "35", "40", "42", "48", "49", "50", "51", 
                    "59", "60", "71", "115", "116", "118", "119", "120", "121", 
                    "123", "124", "125", "126", "127", "129", "130", "135", "142", "144")

dataset_names <- c("baichuan13b_temp0.3", "chatglm_temp0.8", "ernie_api_temp0.01", "ernie_api_temp0.95", "ernie_api_temp1.0", 
                   "gpt3.5_temp0", "gpt3.5_temp0.7", "gpt4_temp0", "gpt4_temp0.7", "llama2_temp0.6", "llama2_uncensored_temp1.0",
                   "deepseek_temp0", "deepseek_temp0.7", "gpt4o_temp0", "gpt4o_temp0.7", 
                   "baichuan13b_temp0.3eng", "chatglm_temp0.8eng", "gpt3.5_temp0eng", "gpt3.5_temp0.7eng", "gpt4_temp0eng", 
                   "gpt4_temp0.7eng", "llama2_temp0.6eng", "llama2_uncensored_temp1.0eng",
                   "deepseek_temp0eng", "deepseek_temp0.7eng", "gpt4o_temp0eng", "gpt4o_temp0.7eng")

# Loop over the data frames by their names and create new filtered data frames
for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  
  # Filter rows where the 'qn_num' column contains values refer to safe questions
  df_filtered <- df %>% filter(qn_num %in% values_to_keep)
  
  # Generate new data frame name
  new_df_name <- paste0(dataset_names[[i]], "_filtered")
  
  # Assign the filtered data frame to the new name
  assign(new_df_name, df_filtered)
}




##-------------------------------------------------------------------------------------------------
##---------------------------Compare models with Chinese input-------------------------------------
##-------------------------------------------------------------------------------------------------

datasets <- list(baichuan13b_temp0.3_filtered,baichuan13b_temp0.3eng_filtered, chatglm_temp0.8_filtered, chatglm_temp0.8eng_filtered, ernie_api_temp0.01_filtered,
                 ernie_api_temp0.95_filtered,ernie_api_temp1.0_filtered, deepseek_temp0_filtered, deepseek_temp0eng_filtered, deepseek_temp0.7_filtered, deepseek_temp0.7eng_filtered, 
                 llama2_temp0.6_filtered, llama2_temp0.6eng_filtered, llama2_uncensored_temp1.0_filtered, llama2_uncensored_temp1.0eng, 
                 gpt3.5_temp0_filtered, gpt3.5_temp0eng_filtered, gpt3.5_temp0.7_filtered, gpt3.5_temp0.7eng_filtered, gpt4_temp0_filtered, gpt4_temp0eng_filtered, gpt4_temp0.7_filtered, gpt4_temp0.7eng_filtered, 
                 gpt4o_temp0_filtered, gpt4o_temp0eng_filtered, gpt4o_temp0.7_filtered, gpt4o_temp0.7eng_filtered)

dataset_names <- c("BaiChuan: T0.3", "BaiChuan(Eng): T0.3", "ChatGLM: T0.8", "ChatGLM(Eng): T0.8", "Ernie Bot: T0.01", 
                   "Ernie Bot: T0.95", "Ernie Bot: T1.0", "DeepSeek: T0", "DeepSeek(Eng): T0", "DeepSeek: T0.7", "DeepSeek(Eng): T0.7", 
                   "Llama2: T0.6", "Llama2(Eng): T0.6", "Llama2-unc.: T1.0", "Llama2-unc.(Eng): T1.0", 
                   "GPT3.5: T0", "GPT3.5(Eng): T0", "GPT3.5: T0.7", "GPT3.5(Eng): T0.7",
                   "GPT4: T0", "GPT4(Eng): T0", "GPT4: T0.7", "GPT4(Eng): T0.7", 
                   "GPT4o: T0", "GPT4o(Eng): T0", "GPT4o: T0.7", "GPT4o(Eng): T0.7")

##---------------------------Refusal to Respond Rate-------------------------------------

# Initialize storage for results
results <- data.frame(dataset = character(),
                      mean = numeric(),
                      std_error = numeric(),
                      stringsAsFactors = FALSE)

# Loop through the datasets
for (i in 1:length(datasets)) {
  data <- datasets[[i]]
  mean_val <- mean(data$per_nonresponse)
  std_error <- sd(data$per_nonresponse) / sqrt(nrow(data))
  
  # Store results
  results <- rbind(results, data.frame(dataset = dataset_names[i],
                                       mean = mean_val,
                                       std_error = std_error))
}

results$dataset <- factor(results$dataset, levels = dataset_names)

results$type <- c("China:Ch","China:En","China:Ch","China:En","China:Ch","China:Ch","China:Ch",
                  "China:Ch","China:En","China:Ch","China:En", "Non-China:Ch","Non-China:En",
                  "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                  "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                  "Non-China:Ch","Non-China:En", "Non-China:Ch","Non-China:En")

results_filtered <- results %>%
  filter(!grepl("\\(Eng\\)", dataset))
results_filtered$dataset <- droplevels(results_filtered$dataset)

results_filtered$type <- c( "China", "China", "China", "China", "China", "China", "China", "Non-China", "Non-China", "Non-China", "Non-China", 
                                                                                         "Non-China", "Non-China", "Non-China", "Non-China")




##---------------------------Character Counts of Response-------------------------------------

# Initialize storage for results counts
results_counts <- data.frame(dataset = character(),
                             mean = numeric(),
                             std_error = numeric(),
                             stringsAsFactors = FALSE)

# Loop through the datasets
for (i in 1:length(datasets)) {
  data <- datasets[[i]]
  mean_val <- mean(data$mean_conv)
  std_error <- sd(data$mean_conv) / sqrt(nrow(data))
  
  # Store results
  results_counts <- rbind(results_counts, data.frame(dataset = dataset_names[i],
                                                     mean = mean_val,
                                                     std_error = std_error))
}

results_counts$dataset <- factor(results_counts$dataset, levels = dataset_names)

results_counts$type <- c("China:Ch","China:En","China:Ch","China:En","China:Ch","China:Ch","China:Ch",
                         "China:Ch","China:En","China:Ch","China:En", "Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En", "Non-China:Ch","Non-China:En") 

##---------------------Remove English Prompts for Plots---------------------------
results_counts_filtered <- results_counts %>%
  filter(!grepl("\\(Eng\\)", dataset))
results_counts_filtered$dataset <- droplevels(results_counts_filtered$dataset)

results_counts_filtered$type <- c( "China", "China", "China", "China", "China", "China", "China", "Non-China", "Non-China", "Non-China", "Non-China", 
                                   "Non-China", "Non-China", "Non-China", "Non-China")

#########Plot Figure S2#########
pdf("graph/countscombineplacebo1.pdf",7,7)
ggplot(results_counts_filtered, aes(x = mean, y = dataset, fill = type)) +
  geom_bar(stat = "identity", orientation = "y") +
  geom_errorbar(aes(xmin = mean - std_error, xmax = mean + std_error), width = 0.2) +
  geom_text(aes(label = round(mean, 0)), hjust = -1.2) +  # Adjusted for horizontal orientation
  scale_fill_manual(name = "LLM:", values = c("China" = "orange", "Non-China" = "skyblue")) +
  scale_x_continuous(limits = c(0, 1400)) +
  scale_y_discrete(limits = rev(levels(results_counts_filtered$dataset))) +  # Reverse the order of y-axis
  labs(title = "Average Counts of Characters",
       y = "Name of LLM and Temperature",
       x = NULL) +
  theme_bw() +
  theme(plot.margin = unit(c(0.5,0.5,0.5,0.65), "cm"),
        axis.text.x = element_text(size = 12),  # Increase font size of x-axis labels
        axis.text.y = element_text(size = 12, vjust = 0.5),  # Increase font size of y-axis labels
        legend.position = "bottom",  # Move legend to bottom
        legend.direction = "horizontal",
        legend.text = element_text(size = 12) ) +  # Arrange legend horizontally
  guides(fill = guide_legend(nrow = 1))  # Arrange legend items in a single row
dev.off()




#------------Save Refusal Rate for the comparison between all questions against less-sensitive questions
cvprefusal$meanplacebo <- results_filtered$mean


cvpboth <-  cvprefusal %>%
  select(dataset, type, mean, meanplacebo) 


library(ggrepel)

#########Plot Figure 5#########
pdf("graph/cvprefusal.pdf",7,7)
ggplot(cvprefusal, aes(x = meanplacebo, y = mean, color = type)) +
  geom_point(size = 3.5, alpha = 0.7) +
  geom_text_repel(
    aes(label = dataset), 
    size = 4,
    force = 6,                  # Increase repulsion force
    max.overlaps = Inf,         # Ensure all labels are displayed
    box.padding = 1,          # Increase space around labels
    point.padding = 0.6,        # Space between points and labels
    segment.color = "gray",     # Gray lines linking labels to points
    segment.size = 0.4,         # Line size for connecting lines
    min.segment.length = 0,     # Always draw connecting lines
    direction = "both"          # Adjust both horizontal and vertical positioning
  ) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray37", size = 0.3) +
  scale_color_manual(name="LLM:", values = c("China" = "orange", "Non-China" = "skyblue")) +
  labs(
    title = "Refusal to Respond Rate (Percentage)",
    x = "30 Less Sensitive Questions",
    y = "All Questions",
    color = "Model Type"
  ) +
  scale_x_continuous(limits = c(0, 65)) +
  scale_y_continuous(limits = c(0, 65)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    axis.title = element_text(size = 12),
    legend.position = c(0.8,0.15),
    legend.text = element_text(size = 12) 
  )
dev.off()


##---------------------Refusal to respond rate over time for China and non-China models-------------------

# Add timing by mapping based on model names
results_timeline$timing <- NA  # Initialize column

# Assign dates based on the model name patterns
results_timeline$timing[grep("GPT3.5", results_timeline$dataset)] <- "July 26, 2023"
results_timeline$timing[grep("ChatGLM", results_timeline$dataset)] <- "July 31, 2023"
results_timeline$timing[grep("DeepSeek", results_timeline$dataset)] <- "June 15, 2025"
results_timeline$timing[grep("GPT4o", results_timeline$dataset)] <- "June 3, 2025"
results_timeline$timing[grep("Ernie", results_timeline$dataset)] <- "November 27, 2023"
results_timeline$timing[grep("Llama2", results_timeline$dataset)] <- "November 27, 2023"
results_timeline$timing[grep("BaiChuan", results_timeline$dataset)] <- "October 16, 2023"
results_timeline$timing[grep("GPT4:", results_timeline$dataset)] <- "October 8, 2023"

results_timeline <- results_timeline %>%
  mutate(timing_date = as.Date(timing, format = "%B %d, %Y"))
# Calculate limits (extending left by 30 days)
x_min <- min(results_timeline$timing_date) - 90  # Extend 30 days to the left
x_max <- max(results_timeline$timing_date) + 60 

pdf("graph/timecomparison.pdf", 7, 7)
ggplot(results_timeline, aes(x = timing_date, y = mean, group = type, color = type)) +
  geom_line(aes(linetype = type), size = 0.5) +  # Lines by group
  geom_point(size = 3) +  # Points for each model
  geom_errorbar(aes(ymin = mean - std_error, ymax = mean + std_error), width = 15, size = 0.8) +  # Error bars
  geom_text_repel(
    aes(label = dataset), 
    size = 3.5,
    force = 6,                  # Increase repulsion force
    max.overlaps = Inf,         # Ensure all labels are displayed
    box.padding = 1.3,          # Increase space around labels
    point.padding = 0.9,        # Space between points and labels
    segment.color = "gray",     # Gray lines linking labels to points
    segment.size = 0.4,         # Line size for connecting lines
    min.segment.length = 0,     # Always draw connecting lines
    direction = "both"          # Adjust both horizontal and vertical positioning
  ) +
  scale_x_date(date_labels = "%b %d, %Y", date_breaks = "3 month", limits = c(x_min, x_max)) +
  geom_vline(xintercept = as.Date("2023-08-15"), linetype = "dashed", color = "black", size = 0.5) +  # Vertical line
  annotate(
    "text",
    x = as.Date("2023-08-15"),
    y = max(results_timeline$mean) - 12,  # Adjust Y position as needed
    label = "Interim Measures for Generative AI",
    angle = 90,
    vjust = -0.5,
    size = 4
  ) +
  scale_color_manual(name = "LLM Group", values = c("China" = "orange", "Non-China" = "skyblue")) +
  scale_linetype_manual(name = "LLM Group", values = c("China" = "solid", "Non-China" = "dashed")) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14),
    axis.title = element_text(size = 14),
    axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
    axis.text.y = element_text(size = 12),
    legend.position = c(0.8, 0.8),
    legend.text = element_text(size = 12)
  ) +
  labs(
    y = "Mean (%) with 95% CIs",
    x = "Timeline of Model Release",
    title = "Refusal-to-Respond Rates by Model Timeline and Group"
  )
dev.off()


#-------------------------------------------------------------------------------------------------------
#-----------------------------------------Counts of Responses after Excluding Refusals------------------
#-------------------------------------------------------------------------------------------------------
#Chinese input models
baichuan13b_temp0.3 <- read.csv("data/intermediate/censorship_baichuan13b_temp0.3_chn.csv")
chatglm_temp0.8 <- read.csv("data/intermediate/censorship_chatglm_temp0.8_chn.csv")
ernie_api_temp0.01 <- read.csv("data/intermediate/censorship_ernie_api_temp0.01_chn.csv")
ernie_api_temp0.95 <- read.csv("data/intermediate/censorship_ernie_api_temp0.95_chn.csv")
ernie_api_temp1.0 <- read.csv("data/intermediate/censorship_ernie_api_temp1.0_chn.csv")
gpt3.5_temp0 <- read.csv("data/intermediate/censorship_gpt3.5_temp0_chn.csv")
gpt3.5_temp0.7 <- read.csv("data/intermediate/censorship_gpt3.5_temp0.7_chn.csv")
gpt4_temp0 <- read.csv("data/intermediate/censorship_gpt4_temp0_chn.csv")
gpt4_temp0.7 <- read.csv("data/intermediate/censorship_gpt4_temp0.7_chn.csv")
llama2_temp0.6 <- read.csv("data/intermediate/censorship_llama2_temp0.6_chnengtrans.csv")
llama2_uncensored_temp1.0 <- read.csv("data/intermediate/censorship_llama2_uncensored_temp1.0_chnengtrans.csv")
deepseek_temp0 <- read.csv("data/intermediate/censorship_deepseek_temp0_chn.csv")
deepseek_temp0.7 <- read.csv("data/intermediate/censorship_deepseek_temp0.7_chn.csv")
gpt4o_temp0 <- read.csv("data/intermediate/censorship_gpt4o_temp0_chn.csv")
gpt4o_temp0.7 <- read.csv("data/intermediate/censorship_gpt4o_temp0.7_chn.csv")



#English input models (translated into Chinese)
baichuan13b_temp0.3eng <- read.csv("data/intermediate/censorship_baichuan13b_temp0.3_engtrans.csv")
chatglm_temp0.8eng <- read.csv("data/intermediate/censorship_chatglm_temp0.8_engtrans.csv")
gpt3.5_temp0eng <- read.csv("data/intermediate/censorship_gpt3.5_temp0_engtrans.csv")
gpt3.5_temp0.7eng <- read.csv("data/intermediate/censorship_gpt3.5_temp0.7_engtrans.csv")
gpt4_temp0eng <- read.csv("data/intermediate/censorship_gpt4_temp0_engtrans.csv")
gpt4_temp0.7eng <- read.csv("data/intermediate/censorship_gpt4_temp0.7_engtrans.csv")
llama2_temp0.6eng <- read.csv("data/intermediate/censorship_llama2_temp0.6_engtrans.csv")
llama2_uncensored_temp1.0eng <- read.csv("data/intermediate/censorship_llama2_uncensored_temp1.0_engtrans.csv")
deepseek_temp0eng <- read.csv("data/intermediate/censorship_deepseek_temp0_engtrans.csv")
deepseek_temp0.7eng <- read.csv("data/intermediate/censorship_deepseek_temp0.7_engtrans.csv")
gpt4o_temp0eng <- read.csv("data/intermediate/censorship_gpt4o_temp0_engtrans.csv")
gpt4o_temp0.7eng <- read.csv("data/intermediate/censorship_gpt4o_temp0.7_engtrans.csv")

dataset_names <- c("baichuan13b_temp0.3", "baichuan13b_temp0.3eng", "chatglm_temp0.8", "chatglm_temp0.8eng", "ernie_api_temp0.01",
                 "ernie_api_temp0.95", "ernie_api_temp1.0", "deepseek_temp0", "deepseek_temp0eng", "deepseek_temp0.7", "deepseek_temp0.7eng", 
                 "llama2_temp0.6", "llama2_temp0.6eng", "llama2_uncensored_temp1.0", "llama2_uncensored_temp1.0eng",
                 "gpt3.5_temp0", "gpt3.5_temp0eng", "gpt3.5_temp0.7", "gpt3.5_temp0.7eng", "gpt4_temp0", "gpt4_temp0eng", "gpt4_temp0.7", "gpt4_temp0.7eng", 
                 "gpt4o_temp0", "gpt4o_temp0eng", "gpt4o_temp0.7", "gpt4o_temp0.7eng")

#--------------------------------------Assign NAs to non-responses------------------------------------------
for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  
  # Apply the transformation to generate regex patterns from `conv_nonresponse`
  df <- df %>%
    rowwise() %>%
    mutate(across(
      starts_with("conv") & !all_of("conv_nonresponse"), # Apply only to columns that start with "conv" excluding "conv_nonresponse"
      ~ {
        # Split `conv_nonresponse` into individual patterns for this row
        conv_nonresponse_list <- str_split(conv_nonresponse, ", ")[[1]]
        
        # Remove "_len" from the current column name before matching
        column_name_clean <- str_replace(cur_column(), "_len$", "")
        
        # Check if the cleaned column name matches any entry in `conv_nonresponse_list`
        if (any(column_name_clean %in% conv_nonresponse_list)) {
          NA  # Set to NA if there's a match
        } else {
          .   # Otherwise, keep the original value
        }
      }
    )) %>%
    ungroup()
  
  # Assign the modified dataset back to a variable in the environment with the original name
  assign(dataset_names[[i]], df)
}



#--------------------------------------Calculate Row Means------------------------------------------
for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  
  df <- df %>%
    rename(chrconv_nonresponse=conv_nonresponse)
  df <- df %>%
    mutate(across(starts_with("conv"), as.numeric))
  
  # Calculate the sum of columns starting with "conv" for each row
  df$mean_conv <- rowMeans(df %>% select(starts_with("conv")), na.rm = TRUE)
  
  # Replace NaN values with NA
  df$mean_conv[is.nan(df$mean_conv)] <- NA
  
  # Assign the modified data frame back to its original name
  assign(dataset_names[[i]], df)
}


# Initialize storage for results counts
results_counts <- data.frame(dataset = character(),
                             mean = numeric(),
                             std_error = numeric(),
                             stringsAsFactors = FALSE)
#Check colmeans
mean(ernie_api_temp0.01$mean_conv, na.rm = TRUE)
mean(baichuan13b_temp0.3$mean_conv, na.rm = TRUE)
# mean(datasets[[1]]$mean_conv, na.rm = TRUE)
# mean(datasets[[6]]$mean_conv, na.rm = TRUE)


#--------------------------------------Calculate Column Means------------------------------------------
for (i in 1:length(dataset_names)) {
  # Dynamically get the data frame by its name
  data <- get(dataset_names[i])
  
  # Check if `mean_conv` exists in the dataset
  if ("mean_conv" %in% names(data)) {
    # Calculate the mean of `mean_conv`, excluding NA values
    mean_val <- mean(data$mean_conv, na.rm = TRUE)
    
    # Calculate the standard error of `mean_conv`, excluding NA values
    std_error <- sd(data$mean_conv, na.rm = TRUE) / sqrt(sum(!is.na(data$mean_conv)))
    
    # Store the results in results_counts
    results_counts <- rbind(results_counts, data.frame(dataset = dataset_names[i],
                                                       mean = mean_val,
                                                       std_error = std_error))
  } else {
    # If mean_conv does not exist, store NA for mean and std_error
    results_counts <- rbind(results_counts, data.frame(dataset = dataset_names[i],
                                                       mean = NA,
                                                       std_error = NA))
  }
}

y_labels <- c("BaiChuan: T0.3", "BaiChuan(Eng): T0.3", "ChatGLM: T0.8", "ChatGLM(Eng): T0.8", "Ernie Bot: T0.01", 
              "Ernie Bot: T0.95", "Ernie Bot: T1.0", "DeepSeek: T0", "DeepSeek(Eng): T0", "DeepSeek: T0.7", "DeepSeek(Eng): T0.7", 
              "Llama2: T0.6", "Llama2(Eng): T0.6", "Llama2-unc.: T1.0", "Llama2-unc.(Eng): T1.0", 
              "GPT3.5: T0", "GPT3.5(Eng): T0", "GPT3.5: T0.7", "GPT3.5(Eng): T0.7",
              "GPT4: T0", "GPT4(Eng): T0", "GPT4: T0.7", "GPT4(Eng): T0.7", 
              "GPT4o: T0", "GPT4o(Eng): T0", "GPT4o: T0.7", "GPT4o(Eng): T0.7") 

results_counts$dataset <- factor(y_labels, levels = y_labels)

results_counts$type <- c("China:Ch","China:En","China:Ch","China:En","China:Ch","China:Ch","China:Ch",
                         "China:Ch","China:En","China:Ch","China:En", "Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En","Non-China:Ch","Non-China:En",
                         "Non-China:Ch","Non-China:En", "Non-China:Ch","Non-China:En") 



##---------------------Remove English Prompts for Plots---------------------------
results_counts_filtered <- results_counts %>%
  filter(!grepl("\\(Eng\\)", dataset))
results_counts_filtered$dataset <- droplevels(results_counts_filtered$dataset)

results_counts_filtered$type <- c( "China", "China", "China", "China", "China", "China", "China", "Non-China", "Non-China", "Non-China", "Non-China", 
                                   "Non-China", "Non-China", "Non-China", "Non-China")

#########Plot Figure S1#########
pdf("graph/countscombinenoncensor.pdf",7,7)
ggplot(results_counts_filtered, aes(x = mean, y = dataset, fill = type)) +
  geom_bar(stat = "identity", orientation = "y") +
  geom_errorbar(aes(xmin = mean - std_error, xmax = mean + std_error), width = 0.2) +
  geom_text(aes(label = round(mean, 0)), hjust = -0.9) +  # Adjusted for horizontal orientation
  scale_fill_manual(name = "LLM:", values = c("China" = "orange", "Non-China" = "skyblue")) +
  scale_x_continuous(limits = c(0, 1400)) +
  scale_y_discrete(limits = rev(levels(results_counts_filtered$dataset))) +
  #scale_y_discrete(limits = rev(levels(results_counts$dataset)), labels = y_labels) +  # Reverse the order of y-axis and add custom labels
  labs(title = "Average Counts of Characters",
       y = "Name of LLM and Temperature",
       x = NULL) +
  theme_bw() +
  theme(plot.margin = unit(c(0.5,0.5,0.5,0.65), "cm"),
        #axis.text.y = element_text(angle = 0, vjust = 1, hjust = 1),
        axis.text.x = element_text(size = 12),  # Increase font size of x-axis labels
        axis.text.y = element_text(size = 12, vjust = 0.5),  # Increase font size of y-axis labels
        legend.position = "bottom",  # Move legend to bottom
        legend.direction = "horizontal", # Arrange legend horizontally
        legend.text = element_text(size = 12))

dev.off()


#---------------------------------------------------------------------------------
#-----------------------------------------Accuracy--------------------------------
#---------------------------------------------------------------------------------

#Chinese input models
#Remove Llama models because the responses are translated from English
baichuan13b_temp0.3 <- read.csv("data/accuracy/censorship_baichuan13b_temp0.3_chn_combined_accuracy.csv")
chatglm_temp0.8 <- read.csv("data/accuracy/censorship_chatglm_temp0.8_chn_combined_accuracy.csv")
ernie_api_temp0.01 <- read.csv("data/accuracy/censorship_ernie_api_temp0.01_chn_combined_accuracy.csv")
ernie_api_temp0.95 <- read.csv("data/accuracy/censorship_ernie_api_temp0.95_chn_combined_accuracy.csv")
ernie_api_temp1.0 <- read.csv("data/accuracy/censorship_ernie_api_temp1.0_chn_combined_accuracy.csv")
gpt3.5_temp0 <- read.csv("data/accuracy/censorship_gpt3.5_temp0_chn_combined_accuracy.csv")
gpt3.5_temp0.7 <- read.csv("data/accuracy/censorship_gpt3.5_temp0.7_chn_combined_accuracy.csv")
gpt4_temp0 <- read.csv("data/accuracy/censorship_gpt4_temp0_chn_combined_accuracy.csv")
gpt4_temp0.7 <- read.csv("data/accuracy/censorship_gpt4_temp0.7_chn_combined_accuracy.csv")
#llama2_temp0.6 <- read.csv("data/accuracy/censorship_llama2_temp0.6_chnengtrans_combined_accuracy.csv")
#llama2_uncensored_temp1.0 <- read.csv("data/accuracy/censorship_llama2_uncensored_temp1.0_chnengtrans_combined_accuracy.csv")
deepseek_temp0 <- read.csv("data/accuracy/censorship_deepseek_temp0_chn_combined_accuracy.csv")
deepseek_temp0.7 <- read.csv("data/accuracy/censorship_deepseek_temp0.7_chn_combined_accuracy.csv")
gpt4o_temp0 <- read.csv("data/accuracy/censorship_gpt4o_temp0_chn_combined_accuracy.csv")
gpt4o_temp0.7 <- read.csv("data/accuracy/censorship_gpt4o_temp0.7_chn_combined_accuracy.csv")

dataset_names <- c("baichuan13b_temp0.3", "chatglm_temp0.8", "ernie_api_temp0.01", "ernie_api_temp0.95", "ernie_api_temp1.0", 
                   "gpt3.5_temp0", "gpt3.5_temp0.7", "gpt4_temp0", "gpt4_temp0.7",
                   "deepseek_temp0", "deepseek_temp0.7", "gpt4o_temp0", "gpt4o_temp0.7")

for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  df$nonresp_per <- df$nonresp_per*100
  df$inaccurate_per <- df$inaccurate_per*100
  df$mixaccu_per <- 100- df$nonresp_per - df$inaccurate_per
  assign(dataset_names[[i]], df)
}

##-------------------------------------------------------------------------------------------------
##---------------------------Compare models with Chinese input-------------------------------------
##-------------------------------------------------------------------------------------------------
datasets <- list(baichuan13b_temp0.3, chatglm_temp0.8, ernie_api_temp0.01, ernie_api_temp0.95,ernie_api_temp1.0, 
                 deepseek_temp0, deepseek_temp0.7,
                 gpt3.5_temp0, gpt3.5_temp0.7, gpt4_temp0, gpt4_temp0.7,
                 gpt4o_temp0, gpt4o_temp0.7)

dataset_names <- c("BaiChuan: T0.3", "ChatGLM: T0.8", "Ernie Bot: T0.01", "Ernie Bot: T0.95", "Ernie Bot: T1.0", 
                   "DeepSeek: T0", "DeepSeek: T0.7",
                   "GPT3.5: T0", "GPT3.5: T0.7", 
                   "GPT4: T0", "GPT4: T0.7", "GPT4o: T0", "GPT4o: T0.7")

# Initialize storage for results
results <- data.frame(dataset = character(),
                      mean = numeric(),
                      std_error = numeric(),
                      stringsAsFactors = FALSE)

# Loop through the datasets
for (i in 1:length(datasets)) {
  data <- datasets[[i]]
  mean_val <- mean(data$inaccurate_per)
  std_error <- sd(data$inaccurate_per) / sqrt(nrow(data))
  
  # Store results
  results <- rbind(results, data.frame(dataset = dataset_names[i],
                                       mean = mean_val,
                                       std_error = std_error))
}


results$dataset <- factor(results$dataset, levels = dataset_names)

results$type <- c( "China", "China", "China", "China", "China", "China", "China", "Non-China", "Non-China", 
                   "Non-China", "Non-China", "Non-China", "Non-China")


#########Plot Figure 3#########
pdf("graph/inaccuracy.pdf",7,7)
ggplot(results, aes(x = mean, y = dataset, fill = type)) +
  geom_bar(stat = "identity", orientation = "y") +
  geom_errorbar(aes(xmin = mean - std_error, xmax = mean + std_error), width = 0.2) +
  geom_text(aes(label = sprintf("%.2f", mean)), hjust = -0.9) +
  scale_fill_manual(name = "LLM:", values = c("China" = "orange", "Non-China" = "skyblue")) +
  scale_x_continuous(limits = c(0, 50)) + 
  scale_y_discrete(limits = rev(levels(results$dataset))) +
  labs(title = "Complete Inaccuracy Rate (Percentage)",
       y = "Name of LLM and Temperature",
       x = NULL) +
  theme_bw() +
  theme(axis.text.y = element_text(angle = 0, vjust = 1, hjust = 1)) +
  theme(plot.margin = unit(c(0.5,0.5,0.5,0.65), "cm"),
        axis.text.x = element_text(size = 12),  # Increase font size of x-axis labels
        axis.text.y = element_text(size = 12, vjust = 0.5),  # Increase font size of y-axis labels
        legend.position = "bottom",  # Move legend to bottom
        legend.direction = "horizontal",
        legend.text = element_text(size = 12)) +  # Arrange legend horizontally
  guides(fill = guide_legend(nrow = 1))  # Arrange legend items in a single row
dev.off()


#--------------------------------------------------------------------------------
#-------------------------------Regression (Table S1)----------------------------
#--------------------------------------------------------------------------------

#-------------------------Chinese input models: Counts---------------------------
baichuan13b_temp0.3 <- read.csv("data/intermediate/censorship_baichuan13b_temp0.3_chn.csv")
chatglm_temp0.8 <- read.csv("data/intermediate/censorship_chatglm_temp0.8_chn.csv")
ernie_api_temp0.01 <- read.csv("data/intermediate/censorship_ernie_api_temp0.01_chn.csv")
ernie_api_temp0.95 <- read.csv("data/intermediate/censorship_ernie_api_temp0.95_chn.csv")
ernie_api_temp1.0 <- read.csv("data/intermediate/censorship_ernie_api_temp1.0_chn.csv")
deepseek_temp0 <- read.csv("data/intermediate/censorship_deepseek_temp0_chn.csv")
deepseek_temp0.7 <- read.csv("data/intermediate/censorship_deepseek_temp0.7_chn.csv")
gpt3.5_temp0 <- read.csv("data/intermediate/censorship_gpt3.5_temp0_chn.csv")
gpt3.5_temp0.7 <- read.csv("data/intermediate/censorship_gpt3.5_temp0.7_chn.csv")
gpt4_temp0 <- read.csv("data/intermediate/censorship_gpt4_temp0_chn.csv")
gpt4_temp0.7 <- read.csv("data/intermediate/censorship_gpt4_temp0.7_chn.csv")
llama2_temp0.6 <- read.csv("data/intermediate/censorship_llama2_temp0.6_chnengtrans.csv")
llama2_uncensored_temp1.0 <- read.csv("data/intermediate/censorship_llama2_uncensored_temp1.0_chnengtrans.csv")
gpt4o_temp0 <- read.csv("data/intermediate/censorship_gpt4o_temp0_chn.csv")
gpt4o_temp0.7 <- read.csv("data/intermediate/censorship_gpt4o_temp0.7_chn.csv")

dataset_names <- c("baichuan13b_temp0.3", "chatglm_temp0.8", "ernie_api_temp0.01", "ernie_api_temp0.95", "ernie_api_temp1.0",
                   "deepseek_temp0", "deepseek_temp0.7", "llama2_temp0.6", "llama2_uncensored_temp1.0",
                   "gpt3.5_temp0", "gpt3.5_temp0.7", "gpt4_temp0", "gpt4_temp0.7",
                   "gpt4o_temp0", "gpt4o_temp0.7")


#Pre-processing: counts, etc.
mode_function <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  
  conv_cols <- df %>%
    select(starts_with("conv")) %>%
    select(where(is.numeric))
  
  # Calculate the sum of columns starting with "conv" for each row
  df$mean_conv <- rowMeans(conv_cols, na.rm = TRUE) 
  
  # Calculate the maximum of columns starting with "conv" for each row
  df$max_conv <- do.call(pmax, c(conv_cols, na.rm = TRUE))
  
  # Calculate the counts of the most frequent responses of columns starting with "conv" for each row
  df$mode_conv <- apply(conv_cols, 1, mode_function)
  
  # Assign the modified data frame back to its original name
  assign(dataset_names[[i]], df)
}

# Initialize empty list to store data
combined_data <- list()

# Loop through datasets
for (name in dataset_names) {
  # Load dataset
  df <- get(name)
  
  # Select required columns and add dataset name
  temp_df <- df %>%
    select(qn_num, prompt_txt, per_nonresponse, mean_conv) %>%
    mutate(dataset = name)
  
  # Store in the list
  combined_data[[name]] <- temp_df
}

# Combine all into one data frame
final_df <- do.call(rbind, combined_data)


#------------------------------Chinese input models: Inaccuracy------------------
#Remove Llama models because the responses are translated from English
baichuan13b_temp0.3 <- read.csv("data/accuracy/censorship_baichuan13b_temp0.3_chn_combined_accuracy.csv")
chatglm_temp0.8 <- read.csv("data/accuracy/censorship_chatglm_temp0.8_chn_combined_accuracy.csv")
ernie_api_temp0.01 <- read.csv("data/accuracy/censorship_ernie_api_temp0.01_chn_combined_accuracy.csv")
ernie_api_temp0.95 <- read.csv("data/accuracy/censorship_ernie_api_temp0.95_chn_combined_accuracy.csv")
ernie_api_temp1.0 <- read.csv("data/accuracy/censorship_ernie_api_temp1.0_chn_combined_accuracy.csv")
gpt3.5_temp0 <- read.csv("data/accuracy/censorship_gpt3.5_temp0_chn_combined_accuracy.csv")
gpt3.5_temp0.7 <- read.csv("data/accuracy/censorship_gpt3.5_temp0.7_chn_combined_accuracy.csv")
gpt4_temp0 <- read.csv("data/accuracy/censorship_gpt4_temp0_chn_combined_accuracy.csv")
gpt4_temp0.7 <- read.csv("data/accuracy/censorship_gpt4_temp0.7_chn_combined_accuracy.csv")
#llama2_temp0.6 <- read.csv("data/accuracy/censorship_llama2_temp0.6_chnengtrans_combined_accuracy.csv")
#llama2_uncensored_temp1.0 <- read.csv("data/accuracy/censorship_llama2_uncensored_temp1.0_chnengtrans_combined_accuracy.csv")
deepseek_temp0 <- read.csv("data/accuracy/censorship_deepseek_temp0_chn_combined_accuracy.csv")
deepseek_temp0.7 <- read.csv("data/accuracy/censorship_deepseek_temp0.7_chn_combined_accuracy.csv")
gpt4o_temp0 <- read.csv("data/accuracy/censorship_gpt4o_temp0_chn_combined_accuracy.csv")
gpt4o_temp0.7 <- read.csv("data/accuracy/censorship_gpt4o_temp0.7_chn_combined_accuracy.csv")

dataset_names <- c("baichuan13b_temp0.3", "chatglm_temp0.8", "ernie_api_temp0.01", "ernie_api_temp0.95", "ernie_api_temp1.0", 
                   "deepseek_temp0", "deepseek_temp0.7", 
                   "gpt3.5_temp0", "gpt3.5_temp0.7", "gpt4_temp0", "gpt4_temp0.7", "gpt4o_temp0", "gpt4o_temp0.7")

for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  df$nonresp_per <- df$nonresp_per*100
  df$inaccurate_per <- df$inaccurate_per*100
  df$mixaccu_per <- 100- df$nonresp_per - df$inaccurate_per
  assign(dataset_names[[i]], df)
}

combined_data <- list()

# Loop through and merge inaccurate_per
for (name in dataset_names) {
  # Load dataset
  df_inacc <- get(name)
  
  # Generate the common identifier qn_num
  df_inacc <- df_inacc %>% mutate(qn_num = qns_idx + 1)
  
  # Select necessary columns
  temp_df <- df_inacc %>%
    select(qn_num, inaccurate_per) %>%
    mutate(dataset = name)
  
  # Store in the list
  combined_data[[name]] <- temp_df
}


 
final_df_inacc <- do.call(rbind, combined_data)
  # Merge with final_df by qn_num, and dataset
  final_df <- final_df %>%
    left_join(final_df_inacc, by = c("qn_num", "dataset"))

final_df$dataset <- factor(final_df$dataset, levels = c("gpt3.5_temp0", setdiff(unique(final_df$dataset), "gpt3.5_temp0")))
model_nonresp <- lm(per_nonresponse ~ dataset, data = final_df)
summary(model_nonresp)

model_count <- lm(mean_conv ~ dataset, data = final_df)
summary(model_count)

model_inacc <- lm(inaccurate_per ~ dataset, data = final_df)
summary(model_inacc)



covariate_labels <- c(
  "BaiChuan: T0.3",     # baichuan13b_temp0.3
  "ChatGLM: T0.8",      # chatglm_temp0.8
  "Ernie Bot: T0.01",   # ernie_api_temp0.01
  "Ernie Bot: T0.95",   # ernie_api_temp0.95
  "Ernie Bot: T1.0",    # ernie_api_temp1.0
  "DeepSeek: T0",       # deepseek_temp0
  "DeepSeek: T0.7",     # deepseek_temp0.7
  "Llama2: T0.6",       # llama2_temp0.6
  "Llama2-unc.: T1.0",  # llama2_uncensored_temp1.0
  "GPT3.5: T0.7",       # gpt3.5_temp0.7
  "GPT4: T0",           # gpt4_temp0
  "GPT4: T0.7",         # gpt4_temp0.7
  "GPT4o: T0",          # gpt4o_temp0
  "GPT4o: T0.7"         # gpt4o_temp0.7
)


#########Export Table S1#########
regtab <- stargazer(model_nonresp, model_count, model_inacc,
          title = "Regression Results",
          dep.var.labels = c("Non-Response", "Mean Conv.", "Inaccuracy"),
          column.labels = c("Non-Response", "Mean Conv.", "Inaccuracy"),
          covariate.labels = covariate_labels,
          align = TRUE,
          type = "latex")

writeLines(regtab, con = "table/regression_results.txt")
#---------------------------------------------------------------------------------------------------------------------
#---------------------------------------------Censorship by Types (Figure S3)-----------------------------------------
#---------------------------------------------------------------------------------------------------------------------

#Chinese input models
baichuan13b_temp0.3 <- read.csv("data/intermediate/censorship_baichuan13b_temp0.3_chn.csv")
chatglm_temp0.8 <- read.csv("data/intermediate/censorship_chatglm_temp0.8_chn.csv")
ernie_api_temp0.01 <- read.csv("data/intermediate/censorship_ernie_api_temp0.01_chn.csv")
ernie_api_temp0.95 <- read.csv("data/intermediate/censorship_ernie_api_temp0.95_chn.csv")
ernie_api_temp1.0 <- read.csv("data/intermediate/censorship_ernie_api_temp1.0_chn.csv")
deepseek_temp0 <- read.csv("data/intermediate/censorship_deepseek_temp0_chn.csv")
deepseek_temp0.7 <- read.csv("data/intermediate/censorship_deepseek_temp0.7_chn.csv")



infotype <- read.csv("data/infotype2.csv")

dataset_names <- c("baichuan13b_temp0.3", "chatglm_temp0.8", "ernie_api_temp0.01", "ernie_api_temp0.95", "ernie_api_temp1.0", 
                   "deepseek_temp0", "deepseek_temp0.7")


#Pre-processing: counts, etc.
mode_function <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  
  conv_cols <- df %>%
    select(starts_with("conv")) %>%
    select(where(is.numeric))
  
  # Calculate the sum of columns starting with "conv" for each row
  df$mean_conv <- rowMeans(conv_cols, na.rm = TRUE) 
  
  # Calculate the maximum of columns starting with "conv" for each row
  df$max_conv <- do.call(pmax, c(conv_cols, na.rm = TRUE))
  
  # Calculate the counts of the most frequent responses of columns starting with "conv" for each row
  df$mode_conv <- apply(conv_cols, 1, mode_function)
  
  # Merge with infotype data by the first column (assuming it's a unique ID)
  df <- merge(df, infotype, by = names(df)[1], all.x = TRUE)
  
  # Assign the modified data frame back to its original name
  assign(dataset_names[[i]], df)
}



#------------------------------------------------------------------China Models----------------------------------------
#dataset_names <- c("baichuan13b_temp0.3", "baichuan13b_temp0.3eng","ernie_api_temp0.01", 
#                   "ernie_api_temp0.95", "ernie_api_temp1.0", "chatglm_temp0.8", "chatglm_temp0.8eng")

# Initialize an empty data frame to store combined data
combined_df <- data.frame()


# Loop through each dataset to combine the data
for (i in 1:length(dataset_names)) {
  # Get the data frame
  df <- get(dataset_names[[i]])
  
  # Calculate mean value by categorical variable
  mean_df <- df %>%
    group_by(type2) %>%
    summarise(mean_value = mean(per_nonresponse, na.rm = TRUE))  # Replace 'per_nonresponse' with your numeric variable
  
  # Add a column for the dataset name
  mean_df$dataset <- dataset_names[[i]]
  
  # Combine the data frames
  combined_df <- rbind(combined_df, mean_df)
}

# Calculate the maximum value and set the upper limit
max_value <- max(combined_df$mean_value)
upper_limit <- max_value + 0.05 * max_value  # Add 5% margin above the max value

combined_df$dataset <- factor(combined_df$dataset, levels = dataset_names)

# Group by dataset
# Reorder the type2 factor levels for baichuan13b_temp0.3 based on mean_value
combined_df <- combined_df %>%
  group_by(dataset) %>%
  mutate(type2 = if_else(dataset == "baichuan13b_temp0.3",
                         factor(type2, levels = type2[order(-mean_value)]),
                         factor(type2, levels = type2))) %>%
  mutate(dataset = recode(dataset, 
                          "baichuan13b_temp0.3" = "BaiChuan: T0.3",
                          "ernie_api_temp0.01" = "Ernie Bot: T0.01",
                          "ernie_api_temp0.95" = "Ernie Bot: T0.95",
                          "ernie_api_temp1.0" = "Ernie Bot: T1.0",
                          "chatglm_temp0.8" = "ChatGLM: T0.8",
                          "deepseek_temp0" = "DeepSeek: T0",
                          "deepseek_temp0.7" = "DeepSeek: T0.7"))


# Define custom colors
custom_colors <- c("BaiChuan: T0.3" = "#8DD3C7",
                   "ChatGLM: T0.8" = "#80B1D3",
                   "Ernie Bot: T0.01" = "#fcafa7",
                   "Ernie Bot: T0.95" = "#fa978c",
                   "Ernie Bot: T1.0" = "#FB8072",
                   "DeepSeek: T0" = "#CCEBC5",
                   "DeepSeek: T0.7" = "#abd6a1"
)


#########Plot Figure S3#########
pdf("graph/censor_type_chn.pdf", width = 10, height = 7)

p <- ggplot(combined_df, aes(x = type2, y = mean_value, fill = dataset)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(limits=c(0, upper_limit)) +
  scale_fill_manual(name = "LLM & Prompt", values = custom_colors) + # Apply color palatte 
  labs(title = "Types of Info. Censored by China Models",
       x = "",
       y = "Percentage of Refusal to Respond") +
  theme_bw() +
  theme(
    text = element_text(size=15), 
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
    plot.margin = unit(c(0.5, 0.5, 0.5, 1), "cm"),
    legend.position = c(0.85, 0.75)
  )

print(p)
dev.off()  # Close the PDF device



